***************************************
* Title: rwanda_jde_tableA18.do
* Author: Todd Pugatch
* Last update: June 10 2024
* Description: analysis for Blimpo and Pugatch, "Entrepreneurship Education
*	and Teacher Training in Rwanda," Stage 2 Registered report, Journal of 
*	Development Economics
* Inputs: 	student_merge_jde.dta
*			headteacher_merge_jde.dta
* Outputs: 	rwanda_jde_tableA18.txt 
*			rwanda_jde_tableA18.out
* Notes: produces Table A18
****************************************

local start=`"$S_TIME"'
clear
clear matrix
clear mata
graph drop _all
set autotabgraphs on
set scheme s2mono
program drop _all
cap log close
set more off

* Set directories 
*global main "/Users/dhorvath/Dropbox (MIT)/Data Publication/rwanda_final/rwanda_replication_package"
	global rawdata "$main/01_data/01_raw"
	global cleandata "$main/01_data/02_clean"
	global dofiles "$main/02_dofiles"
	global results "$main/03_results"
	global temp "$main/04_output"

* begin log file
log using "$temp/rwanda_jde_tableA18.txt", text replace

* ANALYSIS
/*Goal: produce columns 1-10 of Table P6, but interact treatment with quartiles of
		propensity to start business, to explore heterogeneity in effects
	Uses endline student survey.
	Regress outcome on treatment status, controlling for randomization strata.
		Include interactions with quartiles of Pr(business participation) and
			main effects.
	Cluster s.e.'s by school.*/
qui use "$cleandata/student_merge_jde.dta", clear

/*STEP 1: ESTIMATE PROPENSITY TO START BUSINESS
	Regress endline business formation on baseline characteristics.
		Use control group only, so that outcome not affected by treatment.
		Use variables pre-specified for baseline balance as a guide, plus
			baseline business ownership.
	Use this propensity score as an input for Step 2.*/
* first, merge with head teacher and teacher variables from baseline
/*head teacher data: variables for balance tests*/
local Xht "boarding enroll_s4_m_bl enroll_s4_f_bl teach_us_bl teach_absent_pct_bl electricity_now_bl ht_skillslab_definition_bl ht_pedagogy_active_bl"
merge m:1 school_code using "$cleandata/headteacher_merge_jde.dta", keepusing(`Xht')
drop _merge
qui save "$temp/studenttemp.dta", replace
	
/*teacher data: variables for balance tests*/
qui use "$cleandata/teacher_merge_jde.dta", clear
foreach x in female age { /*avoid name conflicts with student data*/
	ren `x'_bl `x'_teacher_bl 
}
ren profit_calculation_bl profcalc_teacher_bl
qui keep if insample_bl==1
qui save "$temp/teachertemp.dta", replace
qui use "$temp/studenttemp.dta", clear
local Xt "female_teacher_bl age_teacher_bl qualified_bl entre_lessonplan_shown_bl pedagogy_active_bl profcalc_teacher_bl profit_definition_bl otherjob_bl"
merge m:1 school_code using "$temp/teachertemp.dta", keepusing(`Xt')
drop _merge	
	
/*student data: variables for balance tests*/
local Xs "female assets_pct_bl moth_primary_ormore_bl repeat_S4_bl S3_exam_bl job_holiday_bl compound_interest_bl anysavings_bl profit_calculation_bl	planned_schl_postsec_bl planned_business_bl grit_raw_bl ownbusiness_bl"

* prep missing values for students without baseline outcome
foreach y0 in `Xht' `Xt' `Xs' {
	qui gen `y0'i=`y0'
	qui replace `y0'i=r(mean) if `y0'==.	
	qui su `y0' if treatment==0
	qui gen `y0'm=(`y0'==.)
	lab var `y0'i "`y0', imputing control mean for missing values"
	lab var `y0'm "missing value for `y0'"
}

* collect covariates
local Xht "boardingi enroll_s4_m_bli enroll_s4_f_bli teach_us_bli teach_absent_pct_bli electricity_now_bli	ht_skillslab_definition_bli ht_pedagogy_active_bli boardingi enroll_s4_m_blm enroll_s4_f_blm teach_us_blm teach_absent_pct_blm electricity_now_blm	ht_skillslab_definition_blm ht_pedagogy_active_blm"

local Xt "female_teacher_bli age_teacher_bli qualified_bli entre_lessonplan_shown_bli pedagogy_active_bli profcalc_teacher_bli profit_definition_bli otherjob_bli female_teacher_blm age_teacher_blm qualified_blm entre_lessonplan_shown_blm pedagogy_active_blm profcalc_teacher_blm profit_definition_blm otherjob_blm"

local Xs "femalei assets_pct_bli moth_primary_ormore_bli repeat_S4_bli S3_exam_bli job_holiday_bli compound_interest_bli anysavings_bli profit_calculation_bli	 planned_schl_postsec_bli planned_business_bli grit_raw_bli ownbusiness_bli femalem assets_pct_blm moth_primary_ormore_blm repeat_S4_blm S3_exam_blm job_holiday_blm compound_interest_blm anysavings_blm profit_calculation_blm planned_schl_postsec_blm planned_business_blm grit_raw_blm ownbusiness_blm"
	
/*Calculate Pr(business participation at endline)
	covariates: baseline school, teacher, and student characteristics pre-specified
		for balance
		strata dummies
	sample: control group*/
qui logit personal_business_el `Xht' `Xt' `Xs' i.strata if treatment==0, cluster(school_code)
qui predict p
	
/*assess overlap in propensity score between treatment and control*/
twoway (kdensity p if treatment==0) (kdensity p if treatment==1), ///
	title("Pr(business participation), endline")	///
	xtitle("probability") ytitle("density")	///
	legend(label(1 "control") label(2 "treatment"))

/*create quartiles and interact with treatment*/
qui xtile p_q=p, n(4)
qui tab p_q, gen(p_q)
forval q=1/4 {
	qui gen D_p_q`q'=treatment*p_q`q'
	lab var p_q`q' "Pr(business participation), quartile `q'"
	lab var D_p_q`q' "treatment*Pr(business participation), quartile `q'"
}
	
/*STEP 2: ANALYSIS*/
/*DROPOUT*/
/*Column 1: dropout*/
* dropout appears in student endline survey (not head teacher survey, as originally written in JDE Reg. Report Stage 1)
* note that we don't know if those who attrited from survey have dropped out or not
* also, there is no relevant baseline outcome to include here

order p_q? D_p_q?
local stat ""control mean",mu_c"
local D "treatment D_p_q2-D_p_q4"
local X0 "p_q2-p_q4"

qui areg dropout `D' `X0', a(strata) cluster(school_code)
qui su dropout if treatment==0 & e(sample)
scalar define mu_c=r(mean)
outreg2 `D' `X0' using "$results/rwanda_jde_tableA18.xls", se excel nolabel nocons addstat(`stat') replace

/*ECONOMIC ACTIVITY*/	
* winsorize all financial variables used in analysis
local fin_el "earn_last2mths_usd profits_last2mths_adj_usd earn_alt_adj_usd"
local fin_bl "earn_last2mths_usd business_inc_last2mths_usd"
foreach w in el bl {
	foreach x in `fin_`w'' {
		winsor `x'_`w', gen(`x'w_`w') p(.01) highonly
		lab var `x'w_`w' "`x'_`w', winsorized at 99th percentile"
	}
}

* prep missing values for students without baseline outcome
/*first, impute zero for (winsorized) business income for those who don't report owning a business 
	(original version conditioned on business ownership)*/
qui replace business_inc_last2mths_usdw_bl=0 if ownbusiness_bl==0

/*now impute control group mean to remaining missing values*/
local Y0 "buspartners_school_bl buspartners_family_bl ownbusiness_nonag_bl earn_money_bl earn_last2mths_usdw_bl business_inc_last2mths_usdw_bl"
foreach y0 in `Y0' {
	qui gen `y0'i=`y0'
	qui replace `y0'i=r(mean) if `y0'==.	
	qui su `y0' if treatment==0
	qui gen `y0'm=(`y0'==.)
	lab var `y0'i "`y0', imputing control mean for missing values"
	lab var `y0'm "missing value for `y0'"
}

/*Columns 2-8: business participation, characteristics, employment*/
/*set up outcomes and control variables for loop*/
local Y "personal_business_el business_solo_el business_sbc_el business_famorpeers_el businesstype_nonag_el	business_employs_el	employed_el"
local y02 "ownbusiness_bl"
local y03 "ownbusiness_bl"
local y04 "buspartners_school_bl"
local y05 "buspartners_family_bl"
local y06 "ownbusiness_nonag_bl"
local y07 "ownbusiness_bl"
local y08 "earn_money_bl"

/*regressions: columns 2-8*/
local stat ""control mean",mu_c,"baseline mean",mu_0"
local i=2 /*initialize loop*/
foreach y in `Y' {
	qui areg `y' `D' `X0' `y0`i''i `y0`i''m, a(strata) cluster(school_code)
	qui su `y' if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	qui su `y0`i'' if e(sample)
	scalar define mu_0=r(mean)
	outreg2 `D' `X0' using "$results/rwanda_jde_tableA18.xls", se excel nolabel nocons addstat(`stat') append
	local i=`i'+1
}

/*Columns 9-10: income and business profits*/
/*also use alternative measure for column 9, as robustness check*/
local Y "earn_last2mths_usdw_el	profits_last2mths_adj_usdw_el earn_alt_adj_usdw_el"
local y09 "earn_last2mths_usdw_bl"
local y010 "business_inc_last2mths_usdw_bl"
local y011 "earn_last2mths_usdw_bl"
local i=9 /*initialize loop*/
foreach y in `Y' {
	qui areg `y' `D' `X0' `y0`i''i `y0`i''m, a(strata) cluster(school_code)
	qui su `y' if treatment==0 & e(sample)
	scalar define mu_c=r(mean)
	qui su `y0`i'' if e(sample)
	scalar define mu_0=r(mean)
	outreg2 `D' `X0' using "$results/rwanda_jde_tableA18.xls", se excel nolabel nocons addstat(`stat') append
	local i=`i'+1
}


erase "$temp/studenttemp.dta"
erase "$temp/teachertemp.dta"
local end=`"$S_TIME"' 
di "`start'"
di "`end'"
log close
